Packages

library(dplyr)
library(dygraphs)
library(forecast)
library(h2o)
library(imputeTS)
library(lubridate)
library(plotly)
library(readxl)
library(TSstudio)
## Obtain data
# set working directory to location of downloaded data files
path <- 
"C:/Users/keoka/OneDrive - Texas A&M University/Courses/STAT_684/Project/Data"
setwd(path)
bf_df <- read_excel("APU0000703112.xls") # data series for beef prices
## New names:
## * `` -> ...2
## Examine data
class(bf_df) # table, data frame
## [1] "tbl_df"     "tbl"        "data.frame"
dim(bf_df) # 469 rows, 2 columns
## [1] 469   2
str(bf_df); summary(bf_df) # tibble with two character variables
## tibble [469 x 2] (S3: tbl_df/tbl/data.frame)
##  $ FRED Graph Observations: chr [1:469] "Federal Reserve Economic Data" "Link: https://fred.stlouisfed.org" "Help: https://fredhelp.stlouisfed.org" "Economic Research Division" ...
##  $ ...2                   : chr [1:469] NA NA NA NA ...
##  FRED Graph Observations     ...2          
##  Length:469              Length:469        
##  Class :character        Class :character  
##  Mode  :character        Mode  :character
head(bf_df,10+5); tail(bf_df,5) # view first 5 and last 5 observations
## # A tibble: 15 x 2
##    `FRED Graph Observations`             ...2                                   
##    <chr>                                 <chr>                                  
##  1 Federal Reserve Economic Data         <NA>                                   
##  2 Link: https://fred.stlouisfed.org     <NA>                                   
##  3 Help: https://fredhelp.stlouisfed.org <NA>                                   
##  4 Economic Research Division            <NA>                                   
##  5 Federal Reserve Bank of St. Louis     <NA>                                   
##  6 <NA>                                  <NA>                                   
##  7 APU0000703112                         Average Price: Ground Beef, 100% Beef ~
##  8 <NA>                                  <NA>                                   
##  9 Frequency: Monthly                    <NA>                                   
## 10 observation_date                      APU0000703112                          
## 11 30682                                 1.29                                   
## 12 30713                                 1.3400000000000001                     
## 13 30742                                 1.3080000000000001                     
## 14 30773                                 1.331                                  
## 15 30803                                 1.3009999999999999
## # A tibble: 5 x 2
##   `FRED Graph Observations` ...2              
##   <chr>                     <chr>             
## 1 44501                     4.7160000000000002
## 2 44531                     4.6040000000000001
## 3 44562                     4.5540000000000003
## 4 44593                     4.6299999999999999
## 5 44621                     4.7569999999999997
names(bf_df) # variable names: "FRED Graph Observations" and "...2"
## [1] "FRED Graph Observations" "...2"
## Reformat data
bf_df <- bf_df[11:469,] # exclude description of data set in first 10 rows
names(bf_df) <- c("date","beef price") # rename variables
bf_df[,1] <- as.Date(as.numeric(unlist(bf_df[,1])), # character to numeric date
                  origin = as.Date("1899-12-30")) # align to Excel origin point
bf_df[,2] <- round(as.numeric(unlist(bf_df[,2])),3) # character to numeric values
str(bf_df); summary(bf_df) # tibble with date variable and numeric variable
## tibble [459 x 2] (S3: tbl_df/tbl/data.frame)
##  $ date      : Date[1:459], format: "1984-01-01" "1984-02-01" ...
##  $ beef price: num [1:459] 1.29 1.34 1.31 1.33 1.3 ...
##       date              beef price   
##  Min.   :1984-01-01   Min.   :0.000  
##  1st Qu.:1993-07-16   1st Qu.:1.430  
##  Median :2003-02-01   Median :1.778  
##  Mean   :2003-01-30   Mean   :2.275  
##  3rd Qu.:2012-08-16   3rd Qu.:3.020  
##  Max.   :2022-03-01   Max.   :4.757
which(is.na(bf_df[,2])) # no missing values
## integer(0)
which(bf_df[,2]==0) # missing value was converted to zero value in row 346
## [1] 346
bf_df[341:350,] # view zero value in row 346
## # A tibble: 10 x 2
##    date       `beef price`
##    <date>            <dbl>
##  1 2012-05-01         3.00
##  2 2012-06-01         3.01
##  3 2012-07-01         3.08
##  4 2012-08-01         2.99
##  5 2012-09-01         3.02
##  6 2012-10-01         0   
##  7 2012-11-01         3.18
##  8 2012-12-01         3.08
##  9 2013-01-01         3.41
## 10 2013-02-01         3.38
bf_df[346,2] <- NA # convert zero value back to missing (NA) value
bf_df[341:350,] # view missing value (NA) in row 346
## # A tibble: 10 x 2
##    date       `beef price`
##    <date>            <dbl>
##  1 2012-05-01         3.00
##  2 2012-06-01         3.01
##  3 2012-07-01         3.08
##  4 2012-08-01         2.99
##  5 2012-09-01         3.02
##  6 2012-10-01        NA   
##  7 2012-11-01         3.18
##  8 2012-12-01         3.08
##  9 2013-01-01         3.41
## 10 2013-02-01         3.38
# impute missing value using linear interpolation
bf_df[346,2] <- na_interpolation(ts(bf_df))[346,2]
bf_df[341:350,] # view imputed value in row 346
## # A tibble: 10 x 2
##    date       `beef price`
##    <date>            <dbl>
##  1 2012-05-01         3.00
##  2 2012-06-01         3.01
##  3 2012-07-01         3.08
##  4 2012-08-01         2.99
##  5 2012-09-01         3.02
##  6 2012-10-01         3.10
##  7 2012-11-01         3.18
##  8 2012-12-01         3.08
##  9 2013-01-01         3.41
## 10 2013-02-01         3.38
## Convert data frome to univariate time series object with defined attributes
bf_ts <- ts(data = bf_df[,2], # series values
         start = c(1984,1), # time of first observation: January 1, 1984
         end = c(2022,3), # time of last observation: March 1, 2022
         frequency = 12) # series frequency
ts_info(bf_ts) # ts object with 1 variable and 459 observations
##  The bf_ts series is a ts object with 1 variable and 459 observations
##  Frequency: 12 
##  Start time: 1984 1 
##  End time: 2022 3
## Obtain data
# set working directory to location of downloaded data files
path = 
"C:/Users/keoka/OneDrive - Texas A&M University/Courses/STAT_684/Project/Data"
setwd(path)
ch_df = read_excel("APU0000706111.xls") # data series for chicken prices
## New names:
## * `` -> ...2
## Examine data
class(ch_df) # table, data frame
## [1] "tbl_df"     "tbl"        "data.frame"
dim(ch_df) # 517 rows, 2 columns
## [1] 517   2
str(ch_df); summary(ch_df) # tibble with two character variables
## tibble [517 x 2] (S3: tbl_df/tbl/data.frame)
##  $ FRED Graph Observations: chr [1:517] "Federal Reserve Economic Data" "Link: https://fred.stlouisfed.org" "Help: https://fredhelp.stlouisfed.org" "Economic Research Division" ...
##  $ ...2                   : chr [1:517] NA NA NA NA ...
##  FRED Graph Observations     ...2          
##  Length:517              Length:517        
##  Class :character        Class :character  
##  Mode  :character        Mode  :character
head(ch_df,10+5); tail(ch_df,5) # view first 5 and last 5 observations
## # A tibble: 15 x 2
##    `FRED Graph Observations`             ...2                                   
##    <chr>                                 <chr>                                  
##  1 Federal Reserve Economic Data         <NA>                                   
##  2 Link: https://fred.stlouisfed.org     <NA>                                   
##  3 Help: https://fredhelp.stlouisfed.org <NA>                                   
##  4 Economic Research Division            <NA>                                   
##  5 Federal Reserve Bank of St. Louis     <NA>                                   
##  6 <NA>                                  <NA>                                   
##  7 APU0000706111                         Average Price: Chicken, Fresh, Whole (~
##  8 <NA>                                  <NA>                                   
##  9 Frequency: Monthly                    <NA>                                   
## 10 observation_date                      APU0000706111                          
## 11 29221                                 0.69899999999999995                    
## 12 29252                                 0.67300000000000004                    
## 13 29281                                 0.65500000000000003                    
## 14 29312                                 0.63800000000000001                    
## 15 29342                                 0.628
## # A tibble: 5 x 2
##   `FRED Graph Observations` ...2              
##   <chr>                     <chr>             
## 1 44501                     1.583             
## 2 44531                     1.6060000000000001
## 3 44562                     1.6220000000000001
## 4 44593                     1.6319999999999999
## 5 44621                     1.724
names(ch_df) # variable names: "FRED Graph Observations" and "...2"
## [1] "FRED Graph Observations" "...2"
## Reformat data
ch_df <- ch_df[11:517,] # exclude description of data set in first 10 rows
names(ch_df) <- c("date","chicken price") # rename variables
ch_df[,1] <- as.Date(as.numeric(unlist(ch_df[,1])), # character to numeric date
                  origin = as.Date("1899-12-30")) # align to Excel origin point
ch_df[,2] <- round(as.numeric(unlist(ch_df[,2])),3) # character to numeric values
str(ch_df); summary(ch_df) # tibble with date variable and numeric variable
## tibble [507 x 2] (S3: tbl_df/tbl/data.frame)
##  $ date         : Date[1:507], format: "1980-01-01" "1980-02-01" ...
##  $ chicken price: num [1:507] 0.699 0.673 0.655 0.638 0.628 0.64 0.71 0.751 0.791 0.792 ...
##       date            chicken price  
##  Min.   :1980-01-01   Min.   :0.000  
##  1st Qu.:1990-07-16   1st Qu.:0.884  
##  Median :2001-02-01   Median :1.048  
##  Mean   :2001-01-30   Mean   :1.092  
##  3rd Qu.:2011-08-16   3rd Qu.:1.304  
##  Max.   :2022-03-01   Max.   :1.747
which(is.na(ch_df[,2])) # no missing values
## integer(0)
which(ch_df[,2]==0) # missing value was converted to zero value in row 485
## [1] 485
ch_df[481:490,] # view zero value in row 485
## # A tibble: 10 x 2
##    date       `chicken price`
##    <date>               <dbl>
##  1 2020-01-01            1.41
##  2 2020-02-01            1.36
##  3 2020-03-01            1.4 
##  4 2020-04-01            1.57
##  5 2020-05-01            0   
##  6 2020-06-01            1.75
##  7 2020-07-01            1.71
##  8 2020-08-01            1.61
##  9 2020-09-01            1.54
## 10 2020-10-01            1.58
ch_df[485,2] <- NA # convert zero value back to missing (NA) value
ch_df[481:490,] # view missing value (NA) in row 485
## # A tibble: 10 x 2
##    date       `chicken price`
##    <date>               <dbl>
##  1 2020-01-01            1.41
##  2 2020-02-01            1.36
##  3 2020-03-01            1.4 
##  4 2020-04-01            1.57
##  5 2020-05-01           NA   
##  6 2020-06-01            1.75
##  7 2020-07-01            1.71
##  8 2020-08-01            1.61
##  9 2020-09-01            1.54
## 10 2020-10-01            1.58
# impute missing value using linear interpolation
ch_df[485,2] <- na_interpolation(ts(ch_df))[485,2]
ch_df[481:490,] # view imputed value in row 485
## # A tibble: 10 x 2
##    date       `chicken price`
##    <date>               <dbl>
##  1 2020-01-01            1.41
##  2 2020-02-01            1.36
##  3 2020-03-01            1.4 
##  4 2020-04-01            1.57
##  5 2020-05-01            1.66
##  6 2020-06-01            1.75
##  7 2020-07-01            1.71
##  8 2020-08-01            1.61
##  9 2020-09-01            1.54
## 10 2020-10-01            1.58
## Convert data frome to univariate time series object with defined attributes
ch_ts <- ts(data = ch_df[,2], # series values
         start = c(1980,1), # time of first observation: January 1, 1980
         end = c(2022,3), # time of last observation: March 1, 2022
         frequency = 12) # series frequency
ts_info(ch_ts) # ts object with 1 variable and 507 observations
##  The ch_ts series is a ts object with 1 variable and 507 observations
##  Frequency: 12 
##  Start time: 1980 1 
##  End time: 2022 3

Multivariate time series data for U.S. beef and chicken prices

## Create multivariate time series object
bfch_ts <- ts(data = cbind(bf_df$`beef price`, # series values
                           ch_df$`chicken price`[49:507]),
              # minimum common date
              start = c(year(min(bf_df$date)), month(min(bf_df$date))),
              frequency = 12) # series frequency
ts_info(bfch_ts) # mts object with 2 variable and 459 observations
##  The bfch_ts series is a mts object with 2 variables and 459 observations
##  Frequency: 12 
##  Start time: 1984 1 
##  End time: 2022 3

Exploratory data analysis

## Plot time series with "plot" function from R built-in "graphics" package
plot.ts(bfch_ts, # multivariate time series object
        plot.type = "multiple",
        main = "U.S. Average Price of Beef vs. Chicken",
        ylab = c("Beef","Chicken"),
        xlab = "Years")

plot.ts(bfch_ts, # multivariate time series object
        plot.type = "single",
        main = "U.S. Average Price of Beef vs. Chicken",
        ylab = c("Beef","Chicken"),
        xlab = "Years")

## Plot time series with "ts_plot" function from "TSstudio" package
ts_plot(bfch_ts, # multivariate time series object
        type = "multiple",
        title = "U.S. Average Price of Beef vs. Chicken",
        Ytitle = "Cost per Pound",
        Xtitle = "Years",
        Xgrid = TRUE,
        Ygrid = TRUE)
ts_plot(bfch_ts, # multivariate time series object
        type = "single",
        title = "U.S. Average Price of Beef vs. Chicken",
        Ytitle = "Cost per Pound",
        Xtitle = "Years",
        Xgrid = TRUE,
        Ygrid = TRUE)
## Plot time series with "dygraph" function from "dygraphs" package
dygraph(bfch_ts, # multivariate time series object
        main = "U.S. Average Price of Beef vs. Chicken") %>%
  dyAxis("y",label = "Cost per Pound") %>%
  dySeries("Series 1",axis='y',label = "Ground Beef", color="red") %>%
  dySeries("Series 2",axis = 'y',label = "Whole Chicken",color = "orange") %>%
  dyLegend()